/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2004 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: mx_ether.c,v 1.24 2006/12/09 01:18:23 loic Exp $";

#include "mx_arch.h"
#include "mx_misc.h"
#include "mx_instance.h"
#include "mx_malloc.h"
#include "mx_pio.h"
#include "mx_peer.h"
#include "mx_ether_common.h"
#include "mx_stbar.h"
#include "bsd/queue.h"
#include <sys/strsubr.h>  	/* for hw cksum stuff */
#include <sys/pattr.h>		/* for hw cksum stuff */
#include <netinet/ip.h>		/* for hw cksum stuff */
#include <netinet/tcp.h>	/* for hw cksum stuff */
#include <netinet/udp.h>	/* for hw cksum stuff */

#define MX_BIGBUFS_MAX 4096

char _depends_on[] = "misc/gld drv/mx_driver drv/mx_mcp";

unsigned char mx_ether_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };

static ddi_dma_attr_t mx_rx_dma_attr = {
        DMA_ATTR_V0,            	/* version number. */
        (uint64_t)0, 			/* low address */
        (uint64_t)0xffffffffffffffffULL,/* high address */
        (uint64_t)0x7ffffff,		/* address counter max */
        (uint64_t)1,			/* alignment */
        (uint_t)0x7f,			/* dlim_burstsizes for 32 and 64 bit xfers */
        (uint32_t)0x1,			/* minimum transfer size */
        (uint64_t)0x7fffffff,		/* maximum transfer size */
        (uint64_t)0x7fffffff,		/* maximum segment size */
        1,                      	/* scatter/gather list length */
        1,                    		/* granularity */
        0                       	/* attribute flags */
};

static ddi_dma_attr_t mx_tx_dma_attr = {
        DMA_ATTR_V0,            	/* version number. */
        (uint64_t)0, 			/* low address */
        (uint64_t)0xffffffffffffffffULL,/* high address */
        (uint64_t)0x7ffffff,		/* address counter max */
        (uint64_t)1,			/* alignment */
        (uint_t)0x7f,			/* dlim_burstsizes for 32 and 64 bit xfers */
        (uint32_t)0x1,			/* minimum transfer size */
        (uint64_t)0x7fffffff,		/* maximum transfer size */
        (uint64_t)0x7fffffff,		/* maximum segment size */
        MX_MCP_ETHER_MAX_SEND_FRAG,     /* scatter/gather list length */
        1,                    		/* granularity */
        0                       	/* attribute flags */
};

int mx_mtu = MX_MAX_ETHER_MTU + MX_MCP_ETHER_PAD;
int mx_jbufs_per_nic = NUM_RX*2;
int mx_jumbo_not_supported = 0;

/*
 * Due to various bugs in Solaris (especially bug 6186772 where the
 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
 * than two elements), and the design bug where hardware checksums are
 * ignored on mblk chains with more than 2 elements, we need to
 * allocate private pool of physically contiguous receive buffers.
 */

extern struct ddi_device_acc_attr mx_dev_access_attr;

static void
mx_jpool_init(struct mx_ether *eth)
{
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;

	bzero(jpool, sizeof(*jpool));
	mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER, NULL);
	SLIST_INIT(&jpool->head);
}

static void
mx_jpool_fini(struct mx_ether *eth)
{
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;	

	if (!SLIST_EMPTY(&jpool->head)) {
		MX_WARN(("BUG! mx_jpool_fini called on non-empty pool\n"));
	}

	mutex_destroy(&jpool->mtx);
}



/*
 * Transfers buffers from the free pool to the nic
 * Must be called holding the jpool mutex.
 */

static inline void
mx_restock_jumbos(struct mx_ether *eth)
{
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;
	struct mx_jpool_entry *j;
	mx_ether_rx_buf_t *rx;
	int i, idx, limit;

	rx = &eth->rx_big;
	limit = eth->arch.j_rx_cnt + NUM_RX;

	for (i = rx->cnt; i != limit; i++) {
		idx = i & (NUM_RX - 1);
		j = SLIST_FIRST(&jpool->head);
		if (j == NULL)
			break;
		SLIST_REMOVE_HEAD(&jpool->head, entries);
		rx->info[idx].j = j;
		rx->shadow[idx].addr_low = j->dma.low;
		rx->shadow[idx].addr_high = j->dma.high;

		/* copy 4 descriptors to the mcp at a time */
		if ((idx & 3) == 3) {
			/* 4 descriptors == 32 bytes for Z fast-writes */
			mx_pio_memcpy(&rx->ring[idx - 3], &rx->shadow[idx - 3],
				    4 * sizeof (*rx->ring), 0);
			MX_STBAR();
			MX_PIO_WRITE(rx->lanai_cnt, htonl(i)); 
		}
	}
	rx->cnt = i;
}

/*
 * Transfer buffers from the nic to the free pool.
 * Should be called holding the jpool mutex
 */

static inline void
mx_unstock_jumbos(struct mx_ether *eth)
{
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;
	struct mx_jpool_entry *j;
	mx_ether_rx_buf_t *rx;
	int i;

	rx = &eth->rx_big;

	for(i = 0; i < NUM_RX; i++) {
		j = rx->info[i].j;
		rx->info[i].j = NULL;
		if (j == NULL)
			continue;
		SLIST_INSERT_HEAD(&jpool->head, j, entries);
	}
}


/*
 * Free routine which is called when the mblk allocated via
 * esballoc() is freed.   Here we return the jumbo buffer
 * to the free pool, and possibly pass some jumbo buffers
 * to the nic
 */

static void
mx_jfree_rtn(void *arg)
{
	struct mx_jpool_entry *j = (struct mx_jpool_entry *)arg;
	struct mx_jpool_stuff *jpool;
	struct mx_ether *eth;
	int num_owned_by_mcp;
	
	eth = j->eth;
	jpool = &eth->arch.jpool;
	mutex_enter(&jpool->mtx);
	SLIST_INSERT_HEAD(&jpool->head, j, entries);
	num_owned_by_mcp = eth->rx_big.cnt - eth->arch.j_rx_cnt;
	if (num_owned_by_mcp < jpool->low_water)
		mx_restock_jumbos(eth);
	mutex_exit(&jpool->mtx);

}

/*
 * Allocates one physically contiguous descriptor
 * and add it to the jumbo buffer pool.
 */

static int
mx_add_jbuf(struct mx_ether *eth)
{
	struct mx_jpool_entry *j;
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;
	size_t real_length;
	ddi_dma_cookie_t cookie;
	uint_t count;
	int err;
	
	j = (struct mx_jpool_entry *) 
		kmem_alloc(sizeof (*j), KM_SLEEP);
	err = ddi_dma_alloc_handle(eth->is->arch.dip,  
				   &mx_rx_dma_attr, 
				   DDI_DMA_DONTWAIT, NULL, 
				   &j->dma_handle);
	if (err != DDI_SUCCESS)
		goto abort_with_j;

	err = ddi_dma_mem_alloc(j->dma_handle, mx_mtu,
				&mx_dev_access_attr,
				DDI_DMA_STREAMING,
				DDI_DMA_DONTWAIT, NULL,
				&j->buf, &real_length,
				&j->acc_handle);
	if (err != DDI_SUCCESS)
		goto abort_with_handle;

	err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, 
				       j->buf, real_length,
				       DDI_DMA_READ|DDI_DMA_STREAMING,
				       DDI_DMA_DONTWAIT, NULL, 
				       &cookie, &count);
	if (err != DDI_SUCCESS)
		goto abort_with_mem;

	j->dma.low = 
		htonl(MX_LOWPART_TO_U32(cookie.dmac_laddress));
	j->dma.high = 
		htonl(MX_HIGHPART_TO_U32(cookie.dmac_laddress));
	j->eth = eth;
	
	j->free_func.free_func = (void (*)()) mx_jfree_rtn;
	j->free_func.free_arg = (char *)j;
	mutex_enter(&jpool->mtx);
	SLIST_INSERT_HEAD(&jpool->head, j, entries);
	mutex_exit(&jpool->mtx);
	return 0;

abort_with_mem:
	ddi_dma_mem_free(&j->acc_handle);

abort_with_handle:
	ddi_dma_free_handle(&j->dma_handle);

abort_with_j:
	kmem_free(j, sizeof (*j));
	return err;
}


static int
mx_add_jbufs(struct mx_ether *eth, int num)
{
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;
	int allocated = 0;
	int err;

	while (num) {
		num--;
		err = mx_add_jbuf(eth);
		if (err == 0) {
			allocated++;
		}
	}
	mutex_enter(&jpool->mtx);
	jpool->num_alloc += allocated;
	mutex_exit(&jpool->mtx);
	return allocated;
}

static void
mx_remove_jbuf(struct mx_jpool_entry *j)
{
	ddi_dma_unbind_handle(j->dma_handle);
	ddi_dma_mem_free(&j->acc_handle);
	ddi_dma_free_handle(&j->dma_handle);
	kmem_free(j, sizeof (*j));
}


static void
mx_remove_jbufs(struct mx_ether *eth) 
{
	mx_sync_t tmp_sync;
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;
	struct mx_jpool_entry *j;
	int wait_time = 4; 

	mx_sync_init(&tmp_sync, eth->is, 0, "mx_remove_jbufs temp sync");

	while(wait_time) {
		mutex_enter(&jpool->mtx);
		while(!SLIST_EMPTY(&jpool->head)) {
			jpool->num_alloc--;
			j = SLIST_FIRST(&jpool->head);
			SLIST_REMOVE_HEAD(&jpool->head, entries);
			mx_remove_jbuf(j);
		}
		mutex_exit(&jpool->mtx);
		if (jpool->num_alloc == 0)
			break;
		printf("myri%d: waiting %d more seconds for %d jumbo bufs to be free'ed\n",
		       eth->is->id, wait_time, jpool->num_alloc);
		wait_time--;

		(void)mx_sleep(&tmp_sync, 1000, MX_SLEEP_NOINTR);
	}

	mx_sync_destroy(&tmp_sync);

}

static void
mx_carve_up_jbufs_into_small_ring(struct mx_ether *eth)
{
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;
	struct mx_jpool_entry *j = NULL;
	caddr_t ptr;
	uint32_t dma_low, dma_high;
	int idx, len;

	dma_low = dma_high = len = 0;
	ptr = NULL;
	for (idx = 0; idx < NUM_RX; idx++) {
		/* Allocate a jumbo frame and carve it into small frames */
		if (len < MX_SMALL_THRESH) {
			mutex_enter(&jpool->mtx);
			j = SLIST_FIRST(&jpool->head);
			SLIST_REMOVE_HEAD(&jpool->head, entries);
			SLIST_INSERT_HEAD(&eth->arch.small_jpool_head, j, entries);
			mutex_exit(&jpool->mtx);
			len = mx_mtu;
			dma_low = ntohl(j->dma.low);
			dma_high = ntohl(j->dma.high);
			ptr = j->buf;
		}
		eth->rx_small.info[idx].ptr = ptr;
		eth->rx_small.shadow[idx].addr_low = 
			htonl(MX_LOWPART_TO_U32(dma_low));
		eth->rx_small.shadow[idx].addr_high = 
			htonl(MX_HIGHPART_TO_U32(dma_high));
		len -= MX_SMALL_THRESH;
		ptr += MX_SMALL_THRESH;
		dma_low += MX_SMALL_THRESH;
	}
}

/*
 * Return the jumbo bufs we carved up for small to the jumbo pool
 */

static void
mx_release_small_jbufs(struct mx_ether *eth)
{
	struct mx_jpool_stuff *jpool = &eth->arch.jpool;
	struct mx_jpool_entry *j = NULL;

	mutex_enter(&jpool->mtx);
	while(!SLIST_EMPTY(&eth->arch.small_jpool_head)) {
		j = SLIST_FIRST(&eth->arch.small_jpool_head);
		SLIST_REMOVE_HEAD(&eth->arch.small_jpool_head, entries);
		SLIST_INSERT_HEAD(&jpool->head, j, entries);
	}
	mutex_exit(&jpool->mtx);
}


/*
 * Frees DMA resources associated with the send ring
 */
static inline void
mx_unprepare_tx_ring(struct mx_ether *eth)
{
	int h;

	for (h = 0; h < NUM_TX; h++)
		ddi_dma_free_handle(&eth->arch.tx_handles[h]);
}

/*
 * Allocates DMA handles associated with the send ring 
 */
static inline int
mx_prepare_tx_ring(struct mx_ether *eth)
{
	int err, h;

	/* allocate the DMA handles*/
	for (h = 0; h < NUM_TX; h++) {
		err = ddi_dma_alloc_handle(eth->is->arch.dip,  
					   &mx_tx_dma_attr, 
					   DDI_DMA_DONTWAIT, NULL, 
					   &eth->arch.tx_handles[h]);
		if (err) {
			MX_WARN(("Failed to allocate tx DMA handles for ethernet\n"));
			goto abort;
		}
	}
	return DDI_SUCCESS;

abort:
	while (h > 0)  {
		h--;
		ddi_dma_free_handle(&eth->arch.tx_handles[h]);
	} 
	return DDI_FAILURE;
}


static int 
mx_ether_reset(gld_mac_info_t *macinfo)
{
	return GLD_SUCCESS;
}

static int 
mx_ether_stop(gld_mac_info_t *macinfo)
{
	struct mx_ether *eth;
	uint32_t dont_care;

	MX_WARN(("mx_ether_stop: called\n"));

	eth = (struct mx_ether *)(macinfo->gldm_private);	

        /* if the device not running give up */
        if (eth->running != MX_ETH_RUNNING &&
            eth->running != MX_ETH_OPEN_FAILED)
                return GLD_FAILURE;
	
	eth->running = MX_ETH_STOPPING;
        mx_lanai_command(eth->is, MX_MCP_CMD_ETHERNET_DOWN,
                         0, 0, 0, &dont_care, &eth->cmd_sync);

	mx_release_small_jbufs(eth);
	mx_unprepare_tx_ring(eth);

	mx_unstock_jumbos(eth);
	mx_remove_jbufs(eth);
	mx_ether_close_common(eth->is);
        eth->running = MX_ETH_STOPPED;

	return GLD_SUCCESS;
}


static int 
mx_ether_start(gld_mac_info_t *macinfo)
{
	struct mx_ether *eth;
	int err, allocated, mx_big_pow2, jbufs_for_smalls, idx;
	uint32_t dont_care;

	if (mx_jumbo_not_supported == 1) {
		MX_WARN(("Jumbo frame not supported by GLD\n"));
		MX_WARN(("Please see /kernel/drv/myri.conf for a workaround\n"));
		return GLD_FAILURE;
	}
	MX_WARN(("mx_ether_start: called\n"));
	/* Allocate DMA resources and receive buffers */

	eth = (struct mx_ether *)(macinfo->gldm_private);	
	if (eth->running != MX_ETH_STOPPED)
		return GLD_FAILURE;

	/* Firmware needs the big buff size as a power of 2.  Lie and
	   tell him the buffer is larger, because we only use 1
	   buffer/pkt, and the mtu will prevent overruns */
	
	mx_big_pow2 = mx_mtu;
	while ((mx_big_pow2 & (mx_big_pow2 - 1)) != 0)
		mx_big_pow2++;
	err = mx_ether_open_common(eth->is, mx_mtu,
				   MX_SMALL_THRESH, mx_big_pow2);

        if (err) {
                MX_WARN(("myri%d: mx_ether_open_common() failed, errno = %d\n",
                         eth->is->id, err));
                goto abort_with_nothing;
        }

	eth->arch.stall = eth->arch.sched = 0;

	jbufs_for_smalls = 1 + NUM_RX / (mx_mtu / MX_SMALL_THRESH);

	allocated = mx_add_jbufs(eth, mx_jbufs_per_nic + jbufs_for_smalls);
	if (allocated < jbufs_for_smalls + 1) {
		MX_WARN(("myri%d: Could not allocate enough jumbo frames (%d/%d)\n",
			 eth->is->id, allocated, 
			 mx_jbufs_per_nic + jbufs_for_smalls));
		goto abort_with_jumbos;
	}

	if (allocated < mx_jbufs_per_nic)
		MX_WARN(("myri%d: Only allocated %d/%d jumbo frames\n",
			 eth->is->id, allocated, 
			 mx_jbufs_per_nic + jbufs_for_smalls));

	eth->arch.jpool.num_alloc = allocated;
	mx_carve_up_jbufs_into_small_ring(eth);
	eth->arch.j_rx_cnt = 0;
	if (allocated < NUM_RX)
		eth->arch.jpool.low_water = allocated/4;
	else
		eth->arch.jpool.low_water = NUM_RX/4;

	/* invalidate the big receive ring in case we do not
	   allocate sufficient jumbos to fill it */
	memset(eth->rx_big.shadow, 1, 
	       NUM_RX * sizeof (eth->rx_big.shadow[0]));

	for (idx = 3; idx < NUM_RX; idx += 4) {
		mx_pio_memcpy(&eth->rx_big.ring[idx - 3], 
			    &eth->rx_big.shadow[idx - 3],
			    4 * sizeof (*eth->rx_big.ring), 0);
		MX_STBAR();
	}

	mx_restock_jumbos(eth);

	for (idx = 3; idx < NUM_RX; idx += 4) {
		mx_pio_memcpy(&eth->rx_small.ring[idx - 3],
			    &eth->rx_small.shadow[idx - 3],
			    4 * sizeof (*eth->rx_small.ring), 0);
		MX_STBAR();
	}

	err = mx_prepare_tx_ring(eth);

	if (err != 0)
		goto abort_with_jumbos;

	/* Tell the MCP how many buffers he has, and to
	   bring the ethernet interface up */

        eth->rx_small.cnt = NUM_RX;
        MX_PIO_WRITE(eth->rx_small.lanai_cnt, htonl(eth->rx_small.cnt));
        MX_PIO_WRITE(eth->rx_big.lanai_cnt, htonl(eth->rx_big.cnt));
	
        /* somehow tell the mcp about this */
        err = mx_lanai_command(eth->is, MX_MCP_CMD_ETHERNET_UP,
			       0, 0, 0, &dont_care, &eth->cmd_sync);
        if (err) {
                MX_WARN(("myri%d: unable to start ethernet\n", eth->is->id));
                goto abort_with_tx;
        }
	mx_ether_start_common(eth->is, mx_mtu,
			      MX_SMALL_THRESH, mx_big_pow2);
        eth->running = MX_ETH_RUNNING;

	return GLD_SUCCESS;

abort_with_tx:
	mx_unprepare_tx_ring(eth);

	mx_release_small_jbufs(eth);

abort_with_jumbos:
	if (allocated != 0) {
		mx_unstock_jumbos(eth);
		mx_remove_jbufs(eth);
	}

	mx_ether_close_common(eth->is);
	eth->running = MX_ETH_OPEN_FAILED;

abort_with_nothing:

	return GLD_FAILURE;
}


static inline void
mx_solaris_do_rx_csum(mblk_t *mp, uint32_t csum)
{
	struct ether_header *eh;
	struct ip *ip;
	uint32_t start, stuff, end;

	eh = (struct ether_header *) mp->b_rptr;
	ip = (struct ip *)(eh + 1);
	start = ip->ip_hl << 2;

	if (ip->ip_p == IPPROTO_TCP)
		stuff = start + offsetof(struct tcphdr, th_sum);
	else if (ip->ip_p == IPPROTO_UDP)	
		stuff = start + offsetof(struct udphdr, uh_sum);
	else {
		return;
	}
		
	end = ntohs(ip->ip_len);

	csum = ntohs((uint16_t)csum);

#if 0
	printf("rx: start:%d stuff:%d end:%d, csum = 0x%x\n",
	       start, stuff, end, csum);
#endif

	(void)hcksum_assoc(mp, NULL, NULL, start, stuff, end,
			   csum, HCK_PARTIALCKSUM, 0);
}

static void
mx_solaris_ether_rx_done_small(mx_instance_state_t *is, uint32_t count, 
			       uint32_t len, uint32_t csum, uint32_t flags)
{
	struct mx_ether *eth;
	gld_mac_info_t  *macinfo;
	mblk_t *mp;
	int idx;

	eth = is->ether;
	macinfo = eth->arch.macinfo;
	idx = eth->rx_small.cnt & (NUM_RX - 1);
        eth->rx_small.cnt++;

	/* allocate a new buffer to pass up the stack */
	mp = allocb(len + MX_MCP_ETHER_PAD, 0);
	if (mp == NULL)
		goto abort;
	bcopy(eth->rx_small.info[idx].ptr,
	      (caddr_t)mp->b_wptr, len + MX_MCP_ETHER_PAD);
	mp->b_wptr += len + MX_MCP_ETHER_PAD;
	mp->b_rptr += MX_MCP_ETHER_PAD;

	if (flags & MX_MCP_ETHER_FLAGS_CKSUM) 
		mx_solaris_do_rx_csum(mp, csum);
		
	gld_recv(macinfo, mp);
abort:
	MX_PIO_WRITE(eth->rx_small.lanai_cnt, htonl(eth->rx_small.cnt));
}


static void
mx_solaris_ether_rx_done_big(mx_instance_state_t *is, uint32_t count, 
			       uint32_t len, uint32_t csum, uint32_t flags)

{
	struct mx_ether *eth;
	gld_mac_info_t  *macinfo;
	struct mx_jpool_stuff *jpool;
	struct mx_jpool_entry *j;
	mblk_t *mp;
	int idx, num_owned_by_mcp, jbufs_for_smalls;

	eth = is->ether;
	macinfo = eth->arch.macinfo;
	jpool = &eth->arch.jpool;
	idx = eth->arch.j_rx_cnt & (NUM_RX - 1);
	j = eth->rx_big.info[idx].j;

	if (j == NULL) {
		printf("NULL: idx=%d, rx_big.cnt = %d, j_rx_cnt=%d, mcp=%d\n",
		       idx, eth->rx_big.cnt, eth->arch.j_rx_cnt,
		       (int)ntohl(*eth->rx_big.lanai_cnt));
		printf("DMA= 0x%x 0x%x\n",
		       eth->rx_big.shadow[idx].addr_high,
		       eth->rx_big.shadow[idx].addr_low);
		panic("j is null");
	}


	eth->rx_big.info[idx].j = NULL;
        eth->arch.j_rx_cnt++;


	/* 
	 * Check to see if we are very low on jumbo buffers. 
	 * Note that we must leave at least 8 free so there are
	 * enough to free in a single 64-byte write.
	 * If we are, first try to hand the nic any buffers sitting
	 * in the free pool.   If we're still low, we must try to
	 * allocate a non-jumbo buffer to copy into, and hand the
	 * nic back the current jumbo buffer ASAP.
	 */
	
	num_owned_by_mcp = eth->rx_big.cnt - eth->arch.j_rx_cnt;
	if (num_owned_by_mcp < 8) {
		/* Try to allocate more if we are below the max  */
		jbufs_for_smalls = 1 + NUM_RX / (mx_mtu / MX_SMALL_THRESH);
		if (jpool->num_alloc - jbufs_for_smalls < MX_BIGBUFS_MAX) {
			mx_add_jbufs(eth, 8);
			/* now feed them to the firmware */
			mutex_enter(&jpool->mtx);
			mx_restock_jumbos(eth);
			mutex_exit(&jpool->mtx);
			num_owned_by_mcp = eth->rx_big.cnt - eth->arch.j_rx_cnt;
		}
		/* if we are still low, then we have to copy */
		if (num_owned_by_mcp < 8) {
			MX_ETHER_STAT_INC(eth, rx_copy);
			/* allocate a new buffer to pass up the stack */
			mp = allocb(len + MX_MCP_ETHER_PAD, 0);
			if (mp == NULL) {
				goto abort;
			}
			bcopy(j->buf,
			      (caddr_t)mp->b_wptr, len + MX_MCP_ETHER_PAD);
			mx_jfree_rtn(j);
			goto did_copy;
		}
	}
	
	/* We have some free jumbo buffers, so we can
	   pass our buffer up without copying it */
	mp = desballoc((unsigned char *)j->buf, mx_mtu, 0, &j->free_func);
	if (mp == NULL) {
		goto abort;
	}

did_copy:


	mp->b_rptr += MX_MCP_ETHER_PAD;
	mp->b_wptr = ((unsigned char *) mp->b_rptr + len);


	if (flags & MX_MCP_ETHER_FLAGS_CKSUM) 
		mx_solaris_do_rx_csum(mp, csum);
		

	gld_recv(macinfo, mp);
	return;

abort:
	mx_jfree_rtn(j);
}

/*
 * Free all transmit buffers up until the specified index 
 */
static void
mx_solaris_ether_tx_done(struct mx_instance_state *is, uint32_t mcp_index)
{
	struct mx_ether *eth;
	mblk_t *m;
	int idx;

	eth = is->ether;

	while (eth->tx.done != (int)mcp_index) {
		idx = eth->tx.done & (NUM_TX - 1);
		m = eth->tx.info[idx].m;
                eth->tx.info[idx].m = 0;
                eth->tx.done++;

                /* mblk & DMA handle attached only to first slot
		 * per buffer in the packet
		 */

                if (m) {
			ddi_dma_unbind_handle(eth->arch.tx_handles[idx]);
			freeb(m);
		}
	}

	/* if we stalled the queue, wake it */

	while (eth->arch.stall != eth->arch.sched) {
		eth->arch.sched++;
		gld_sched(eth->arch.macinfo);
	}
	
}

static void
mx_solaris_ether_link_change(mx_instance_state_t *is)
{
	struct mx_ether *eth;
	uint32_t state;
	
	if (is == NULL)
		return;

	eth = is->ether;
	if (eth == NULL)
		return;

	if (eth->running != MX_ETH_RUNNING)
		return;

	if (is->link_state == 0)
		state = GLD_LINKSTATE_DOWN;
	else
		state = GLD_LINKSTATE_UP;
	gld_linkstate(eth->arch.macinfo, state);
}

static int
mx_ether_set_mac_address(gld_mac_info_t  *macinfo,  
			 unsigned char *macaddr)
{
	return GLD_NOTSUPPORTED;
}


static  int 
mx_ether_set_multicast(gld_mac_info_t  *macinfo,  
		     unsigned char *multicastaddr, int multiflag)
{
	return GLD_NOTSUPPORTED;
}


static int
mx_ether_set_promiscuous(gld_mac_info_t   *macinfo,   int
		       promiscflag)
{
	return GLD_SUCCESS;
}


static void
mx_pullupmsg(struct mx_ether *eth, mblk_t *mp, int len)
{
	uint32_t start, stuff, tx_offload_flags;
	int ok;

	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, 
			NULL, &tx_offload_flags);

	ok = pullupmsg(mp, len);
	if (!ok)
		printf("pullupmsg failed\n");

	if (len == -1)
		MX_ETHER_STAT_INC(eth, xmit_pullup);
	else if (len == 6)
		MX_ETHER_STAT_INC(eth, xmit_pullup_first);
	else
		printf("mx_pullup called with wrong len %d", len);

	(void)hcksum_assoc(mp, NULL, NULL, start, stuff, NULL,
			   NULL, tx_offload_flags, 0);
}


/*
 * Try to send the chain of buffers described by the mp.  We must not
 * encapsulate more than eth->tx.req - eth->tx.done, or 
 * MX_MCP_ETHER_MAX_SEND_DESC, whichever is more.
 */



static int
mx_ether_send(gld_mac_info_t *macinfo, mblk_t *mp)
{
	mcp_kreq_ether_send_t req_list[MX_MCP_ETHER_MAX_SEND_FRAG + 1];
	mcp_kreq_ether_send_t *req;
	struct mx_ether *eth;
	char *header;
	mblk_t  *bp;
	ddi_dma_cookie_t cookie;
	ddi_dma_handle_t dma_handle;
	int err, rv, count, idx, last_idx, avail, do_pullup, mblen;
	uint32_t start, stuff, tx_offload_flags, cum_len;
	uint_t ncookies;

	eth = (struct mx_ether *)(macinfo->gldm_private);	
	do_pullup = 1;
	mutex_enter(&eth->arch.txlock);

again:
	req = req_list;

	/* leave an extra slot keep the ring from wrapping */
	avail = NUM_TX - 1 - (eth->tx.req - eth->tx.done);

	/* If we have > MX_MCP_ETHER_MAX_SEND_FRAG + 1, then any
	 * over-length message will need to be pulled up in 
	 * order to fit.  Otherwise, we are low on transmit
	 * descriptors, it is probably better to stall
	 * and try again rather than pullup a message to fit.
	 */
	 
	if (avail < MX_MCP_ETHER_MAX_SEND_FRAG + 1) {
		err = GLD_NORESOURCES;
		goto stall;
	}

	idx = eth->tx.req & (NUM_TX - 1);

	/* find out how long the frame is and how many segments it is*/
	cum_len = 0;
	count = 0;
	for (bp = mp; bp != NULL; bp = bp->b_cont) {
		mblen = (int)(bp->b_wptr - bp->b_rptr);
		if (mblen == 0) {
			/* we can't simply skip over 0-length mblks
			   because the hardware can't deal with them,
			   and we could leak them.
			*/
			mx_pullupmsg(eth, bp, -1);
			mblen = (int)(bp->b_wptr - bp->b_rptr);
			if (mblen == 0) {
				if (bp->b_cont == NULL)
					continue;
				else
					panic("Couldn't fix 0 len\n");
			}
			MX_ETHER_STAT_INC(eth, xmit_zero_len);
		}
		cum_len += mblen;
		count++;
	}
	
	/* pull up excessivly long chains into a single
	   segment we'll be able to DMA */
	if (count > MX_MCP_ETHER_MAX_SEND_FRAG) {
		mx_pullupmsg(eth, mp, -1);
	}

	/* Make sure we have at least the mac address, and
	   maybe the IP header in the first segment */

	mblen = (int)(mp->b_wptr - mp->b_rptr);
	if (mblen < 6) {
		mx_pullupmsg(eth, mp, 6);
	}

	/* Setup checksum offloading, if needed */
	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, 
			NULL, &tx_offload_flags);
	
	req->head.flags = MX_MCP_ETHER_FLAGS_VALID | MX_MCP_ETHER_FLAGS_HEAD;
	req->head.pseudo_hdr_offset = 0;
	req->head.cksum_offset = 0;
	if (tx_offload_flags & HCK_PARTIALCKSUM) {
		uint16_t pseudo_hdr_offset = 
			stuff + sizeof(struct ether_header);
		uint16_t cksum_offset = start + sizeof(struct ether_header);

                req->head.pseudo_hdr_offset = htons(pseudo_hdr_offset);
                req->head.cksum_offset = htons(cksum_offset);
                req->head.flags |= MX_MCP_ETHER_FLAGS_CKSUM;
	}

        /* Get the destination mac address for the mcp.
           It is already in network byte order */

	header = (caddr_t)mp->b_rptr;
        req->head.dest_high16 = *(uint16_t *)&header[0];
	req->head.dest_low32 = header[1] << 16;
	req->head.dest_low32 |= header[2];
	
	req++;
	idx = (idx + 1) & (NUM_TX - 1);
	
	for (count = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
		mblen = (int)(bp->b_wptr - bp->b_rptr);
		/* save the dma handle for use when extracting cookes */
		dma_handle = eth->arch.tx_handles[idx];
		rv = ddi_dma_addr_bind_handle(dma_handle, NULL,
					      (caddr_t)bp->b_rptr, mblen,
					      DDI_DMA_WRITE | DDI_DMA_STREAMING, 
					      DDI_DMA_SLEEP, NULL,
					      &cookie, &ncookies);
		switch (rv) {
		case DDI_DMA_MAPPED:
			break;
		case DDI_DMA_NORESOURCES:
			err = GLD_FAILURE;
			goto abort_with_mapped;
			break;
		default:
			err = GLD_FAILURE;
			do_pullup = 0;
			goto abort_with_mapped;
			break;
		}

		if (eth->tx.info[idx].m != NULL) {
			panic("slot had lingering buf: %d/%d",
			      idx, eth->tx.req + count);
		}

		/* reserve this slot, and extract cookies */
		eth->tx.info[idx].m = bp;

		/* make sure all the cookies will fit */
		if (ncookies + count > MX_MCP_ETHER_MAX_SEND_FRAG) {
			idx = (count + 2 + eth->tx.req) & (NUM_TX - 1);
			MX_ETHER_STAT_INC(eth, xmit_lowbuf);
			err = GLD_FAILURE;
			goto abort_with_mapped;
		}

		while(1) {
			req->frag.addr_low = 
				htonl(MX_LOWPART_TO_U32(cookie.dmac_laddress));
			req->frag.addr_high = 
				htonl(MX_HIGHPART_TO_U32(cookie.dmac_laddress));
			req->frag.length = htons(cookie.dmac_size);
			req->frag.flags = MX_MCP_ETHER_FLAGS_VALID;
			count++;
			req++;
			idx = (idx + 1) & (NUM_TX - 1);
			ncookies--;
			if (ncookies == 0)
				break;
			ddi_dma_nextcookie(dma_handle, &cookie);
		}
	}

	/* terminate the request chain */
	req--;
	req->frag.flags |= MX_MCP_ETHER_FLAGS_LAST;

	/* Tell the lanai about it */
	mx_ether_submit_tx_req(eth, req_list, 1 + count);
	
	mutex_exit(&eth->arch.txlock);
	return GLD_SUCCESS;


abort_with_mapped:
	last_idx = idx;
	idx = eth->tx.req & (NUM_TX - 1);
	while (idx != last_idx) {
		bp = eth->tx.info[idx].m;
		eth->tx.info[idx].m = 0;
		if (bp)
			ddi_dma_unbind_handle(eth->arch.tx_handles[idx]);
		idx = (idx + 1) & (NUM_TX - 1);
	}
	if (do_pullup) {
		mx_pullupmsg(eth, mp, -1);
		do_pullup = 0;
		goto again;
	}
stall:
	if (err == GLD_NORESOURCES) {
		eth->arch.stall++;
	} else if (err != 0){
		printf("mx_ether_send xmit_err, errno = %d\n", err);
	}
	mutex_exit(&eth->arch.txlock);
	return err;
}


static uint_t 
mx_ether_intr_hack(gld_mac_info_t *macinfo)
{
	return  DDI_INTR_UNCLAIMED;
}


static int
mx_ether_get_stats(gld_mac_info_t    *macinfo,    struct
		   gld_stats *stats)
{
	int i;
	uint64_t speed;
	struct mx_ether *eth = (struct mx_ether *)(macinfo->gldm_private);
	mx_instance_state_t *is = eth->is;

	for (speed = 0, i = 0; i < (int)is->num_ports; i++)
                if (is->link_state & (1 << i))
                        speed += 2000;
	stats->glds_speed = speed * 1000000LL;
	if (speed) {
		stats->glds_duplex = GLD_DUPLEX_FULL;
	}

	return GLD_SUCCESS;
}

void
mx_ether_stat_destroy(struct mx_ether *eth)
{
	if (eth->arch.ksp == NULL)
		return;

	kstat_delete(eth->arch.ksp);
	eth->arch.ksp = NULL;
}

static int
mx_ether_kstat_update(kstat_t *ksp, int rw)
{
	struct mx_ether_stat *ethstat;
	struct mx_ether *eth;

	if (rw == KSTAT_WRITE)
		return (EACCES);

	ethstat = (struct mx_ether_stat *)ksp->ks_data;
	eth = (struct mx_ether *)ksp->ks_private;

	ethstat->xmit_stall.value.ul = eth->arch.stall;
	ethstat->xmit_sched.value.ul = eth->arch.sched;
	ethstat->rx_jumbo_cnt.value.ul = eth->arch.jpool.num_alloc;

	return 0;
}	

static void
mx_ether_stat_init(struct mx_ether *eth)
{
    struct kstat *ksp;
    struct mx_ether_stat *ethstat;

    ksp = kstat_create("mx_ether", eth->is->id,
		       NULL, "net", KSTAT_TYPE_NAMED,
		       sizeof(*ethstat) / sizeof(kstat_named_t), 0);
    if (ksp == NULL) {
	    MX_WARN(("kstat_create failed"));
	    return;
    }
    eth->arch.ksp = ksp;
    ethstat = (struct mx_ether_stat *) (ksp->ks_data);
    kstat_named_init(&ethstat->rx_copy, "rx_copy",
                     KSTAT_DATA_ULONG);
    kstat_named_init(&ethstat->rx_jumbo_cnt, "rx_jumbo_cnt",
                     KSTAT_DATA_ULONG);
    kstat_named_init(&ethstat->xmit_zero_len, "xmit_zero_len",
                     KSTAT_DATA_ULONG);
    kstat_named_init(&ethstat->xmit_pullup, "xmit_pullup",
                     KSTAT_DATA_ULONG);
    kstat_named_init(&ethstat->xmit_pullup_first, "xmit_pullup_first",
                     KSTAT_DATA_ULONG);
    kstat_named_init(&ethstat->xmit_lowbuf, "xmit_lowbuf",
                     KSTAT_DATA_ULONG);
    kstat_named_init(&ethstat->xmit_stall, "xmit_stall",
                     KSTAT_DATA_ULONG);
    kstat_named_init(&ethstat->xmit_sched, "xmit_sched",
                     KSTAT_DATA_ULONG);
    ksp->ks_update = mx_ether_kstat_update;
    ksp->ks_private = (void *) eth;
    kstat_install(ksp);
}

static int
myri_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
	int unit, err;
	mx_instance_state_t *is;
	struct mx_ether *eth;
	gld_mac_info_t *macinfo;
	int mx_mtu_override = 0;

	if (cmd != DDI_ATTACH)
		return DDI_FAILURE;

	unit = ddi_get_instance(dip);
	is = mx_get_instance(unit);
	if (is == NULL)
		return DDI_FAILURE;

        macinfo = gld_mac_alloc(dip);
        if (macinfo == NULL) {
		goto abort_with_instance;
        }
	eth = (struct mx_ether *)kmem_zalloc(sizeof (*eth), KM_SLEEP);
	if (eth == NULL) {
		goto abort_with_macinfo;
	}
	eth->is = is;
	is->ether = eth;
	eth->arch.macinfo = macinfo;

	/* See if the user wants to set an MTU */
	mx_mtu_override = 
		ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, "mx_mtu_override", mx_mtu_override);

	if (mx_mtu_override >= 1500 && mx_mtu_override <= 9000)
		mx_mtu = mx_mtu_override + sizeof (struct ether_header) + MX_MCP_ETHER_PAD;
	else if (mx_mtu_override != 0) {
		MX_WARN(("mx_mtu_override must be between 1500 and 9000 bytes\n"));
		mx_mtu_override = 0;
	}

	macinfo->gldm_private = (caddr_t)eth;
	macinfo->gldm_reset = mx_ether_reset;
	macinfo->gldm_start = mx_ether_start;
	macinfo->gldm_stop = mx_ether_stop;
	macinfo->gldm_set_mac_addr = mx_ether_set_mac_address;
	macinfo->gldm_set_multicast = mx_ether_set_multicast;
	macinfo->gldm_set_promiscuous = mx_ether_set_promiscuous;
	macinfo->gldm_get_stats	= mx_ether_get_stats;
	macinfo->gldm_send = mx_ether_send;
	macinfo->gldm_intr = mx_ether_intr_hack;
	macinfo->gldm_ioctl = NULL;

	/*
	 *	Initialize  GLD state
	 */
	macinfo->gldm_ident = "Myrinet Express ethernet";
	macinfo->gldm_type = DL_ETHER;
	macinfo->gldm_minpkt = 0;   
	macinfo->gldm_maxpkt = mx_mtu - sizeof (struct ether_header) - MX_MCP_ETHER_PAD;
	macinfo->gldm_addrlen = ETHERADDRL;
	macinfo->gldm_saplen = -2;

	/* Other required initialization */
	macinfo->gldm_ppa = unit;
	macinfo->gldm_vendor_addr = is->mac_addr;
	macinfo->gldm_broadcast_addr = mx_ether_broadcastaddr;
	macinfo->gldm_devinfo = dip;
	macinfo->gldm_cookie = is->arch.iblock_cookie;

	/* Claim to support HW checksums */
	macinfo->gldm_capabilities = 
		GLD_CAP_CKSUM_PARTIAL|GLD_CAP_ZEROCOPY|GLD_CAP_LINKSTATE;

	mutex_init(&eth->arch.txlock,
		   NULL, MUTEX_DRIVER, macinfo->gldm_cookie);
	mutex_init(&eth->arch.intrlock,
		   NULL, MUTEX_DRIVER, macinfo->gldm_cookie);

        is->arch.ether_tx_done = mx_solaris_ether_tx_done;
        is->arch.ether_rx_done_small = mx_solaris_ether_rx_done_small;
        is->arch.ether_rx_done_big = mx_solaris_ether_rx_done_big;
        is->arch.ether_link_change = mx_solaris_ether_link_change;

        eth->running = MX_ETH_STOPPED;

	mx_jpool_init(eth);

	err = gld_register(dip, "myri", macinfo);
	if (err != DDI_SUCCESS) {
		MX_WARN(("GLD may not like jumbo frames..\n", err));
		mx_jumbo_not_supported = 1;
		mx_mtu = ETHERMTU + sizeof (struct ether_header) + MX_MCP_ETHER_PAD;
		macinfo->gldm_maxpkt 
			= mx_mtu - sizeof (struct ether_header) - MX_MCP_ETHER_PAD;

		err = gld_register(dip, "myri", macinfo);
		if (err != 0) {
			MX_WARN(("GLD attach still failed\n"));
			goto abort_with_jpool;
		}
	}

	mx_ether_stat_init(eth);
	printf("myri_attach succeeds on unit %d\n", unit);
	return DDI_SUCCESS;
	
abort_with_jpool:
	mx_jpool_fini(eth);
	mutex_destroy(&eth->arch.txlock);
	mutex_destroy(&eth->arch.intrlock);
	kmem_free(eth, sizeof (*eth));

abort_with_macinfo:
	gld_mac_free(macinfo);
	
abort_with_instance:
	mx_release_instance(is);
	return DDI_FAILURE;
}

static int
myri_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
	gld_mac_info_t *macinfo;
	struct mx_ether *ether;
	mx_instance_state_t *is;
	int err;

	macinfo = ddi_get_driver_private(dip);
	ether = (struct mx_ether *)(macinfo->gldm_private);
	is = ether->is;
	is->ether = 0;
        is->arch.ether_link_change = 0;
	mx_ether_stat_destroy(ether);
	printf("myri_detach called\n");
	err = gld_unregister(macinfo);
	if (err != DDI_SUCCESS)
		return err;


	mx_jpool_fini(ether);
	mutex_destroy(&ether->arch.txlock);
	mutex_destroy(&ether->arch.intrlock);
	kmem_free(ether, sizeof (*ether));
	gld_mac_free(macinfo);
	
	mx_release_instance(is);
	printf("myri_detach succeeded\n");
	return DDI_SUCCESS;
}





/* Streams routines */

static  struct module_info mx_ether_minfo = {
        0,				/* mi_idnum */
        "myri",				/* mi_idname */
        0,				/* mi_minpsz */
        MX_MAX_ETHER_MTU,		/* mi_maxpsz */
        NUM_TX*MX_MAX_ETHER_MTU,	/* mi_hiwat */
        1,				/* mi_lowat */
};

static  struct qinit mx_ether_rinit = {
        (int (*)()) NULL,	/* qi_putp */
        gld_rsrv,		/* qi_srvp */
        gld_open,		/* qi_qopen */
        gld_close,		/* qi_qclose */
        (int (*)()) NULL,	/* qi_qadmin */
        &mx_ether_minfo,	/* qi_minfo */
        NULL			/* qi_mstat */
};

static  struct qinit mx_ether_winit = {
        gld_wput,		/* qi_putp */
        gld_wsrv,		/* qi_srvp */
        (int (*)()) NULL,	/* qi_qopen */
        (int (*)()) NULL,	/* qi_qclose */
        (int (*)()) NULL,	/* qi_qadmin */
        &mx_ether_minfo,	/* qi_minfo */
        NULL			/* qi_mstat */
};

static struct streamtab mx_ether_info = {
        &mx_ether_rinit,	/* st_rdinit */
        &mx_ether_winit,	/* st_wrinit */
        NULL,			/* st_muxrinit */
        NULL			/* st_muxwrinit */
};


static  struct cb_ops cb_mx_ether_ops = {
        nulldev,	/* cb_open */
        nulldev,	/* cb_close */
        nodev,		/* cb_strategy */
        nodev,		/* cb_print */
        nodev,		/* cb_dump */
        nodev,		/* cb_read */
        nodev,		/* cb_write */
        nodev,		/* cb_ioctl */
        nodev,		/* cb_devmap */
        nodev,		/* cb_mmap */
        nodev,		/* cb_segmap */
        nochpoll,	/* cb_chpoll */
        ddi_prop_op,	/* cb_prop_op */
        &mx_ether_info,	/* cb_stream */
        D_NEW|D_MP	/* cb_flag */
};

static  struct dev_ops mx_ether_ops = {
        DEVO_REV,		/* devo_rev */
        0,			/* devo_refcnt */
        gld_getinfo,		/* devo_getinfo */
        nulldev,		/* devo_identify */
        nulldev,		/* devo_probe */
        myri_attach,		/* devo_attach */
        myri_detach,		/* devo_detach */
        nodev,			/* devo_reset */
        &cb_mx_ether_ops,	/* devo_cb_ops */
        (struct bus_ops *)NULL,	/* devo_bus_ops */
};

static struct modldrv modldrv = {
        &mod_driverops, 
        "myri",
        &mx_ether_ops,  
};

static struct modlinkage modlinkage = {
        MODREV_1, {&modldrv, NULL},
};

int
_init(void)
{
	int     status;
        status = mod_install(&modlinkage);
	MX_INFO(("mx_ether: installed\n"));
        return (status);
}

int
_fini(void)
{
        int     status;

        status = mod_remove(&modlinkage);
	MX_INFO(("mx_ether: removed\n"));
        return (status);
}

int
_info(struct modinfo *modinfop)
{
        return (mod_info(&modlinkage, modinfop));
}



/*
  This file uses MX driver indentation.

  Local Variables:
  c-file-style:"linux"
  tab-width:8
  End:
*/
